This page shows the plots of covid case rate per 100k by state since the epidemic.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(naniar)

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
knitr::opts_chunk$set(
  fig.width = 8,
  fig.asp = .8,
  out.width = "90%"
)

Total Covid Case Rate Per 100k By State

First, let’s input the CDC covid cases data by state with total time period till Thu Nov 18 2021.

[The data generated: Thu Nov 18 2021 22:02:06 GMT-0500 (EST)]

# input data table for Total Cases by State/Territory
covid_total_by_state = 
  read_csv("./data/united_states_covid19_cases_deaths_and_testing_by_state.csv", skip = 2) %>% 
  janitor::clean_names()
## Rows: 62 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (25): State/Territory, Level of Community Transmission, Total Cases, Con...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

For the missing data, the original dataset fill it with “N/A”, so we replace it with NA and change the variable to numeric.

# figure out the missing data in data frame
na_strings = "N/A"

# fill in missing data
covid_total_by_state_tidy =
  covid_total_by_state %>% 
  replace_with_na_all(condition = ~.x %in% na_strings) %>% 
  mutate(
    state_territory = as.factor(state_territory),
    case_rate_per_100000 = as.numeric(case_rate_per_100000)
         ) %>% 
  rename(region = "state_territory") %>% 
  select(region, case_rate_per_100000) %>% 
  mutate(region = recode(region, "New York*" = "New York"))

Let’s make a choropleth map plot to show the total covid cases by state in US.

  1. Load the usa postal code dataset.
postal_code = 
  read_csv("./data/us_postal_code.csv") %>% 
  janitor::clean_names()
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): State, Abbrev, Code
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 
plot_df = 
  left_join(postal_code, covid_total_by_state_tidy, by = c("state" = "region")) %>% 
  relocate(code)

2.. Draw the plotly.

# specify some map projection/options
g = list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)

plot_df$hover = with(plot_df, paste(
  state, '<br>')
  )

map_plotly1 = 
  plot_geo(plot_df, locationmode = 'USA-states') %>% 
  add_trace(
    z = ~case_rate_per_100000, text = ~hover, locations = ~code,
    color = ~case_rate_per_100000, colors = 'Purples'
  ) %>% 
  colorbar(title = "Case Rate Per 100K") %>% 
  layout(
    title = "COVID-19 Case Rate by State/Territory (cases per 100,000)",
    geo = g
  )

Total Covid Case Rate Per 100k By State in 2020

The following data frame is the covid case rate per 100k till Dec.31.2020.

# read data function
read_data_function = function(file_df) {
  
  state_df = 
    read_csv(file_df, skip = 2)
  
  return(state_df)
}

# create a data frame containing all participants
state_2020 =
  tibble(
    files = list.files("./data/state_covid_separate")
  ) %>% 
  mutate(
    path = map(.x = files, ~paste("./data/state_covid_separate", ., sep = "/"))
  ) %>%                                                             # add path
  mutate(
    observations = map(path, read_data_function))

Let’s unnest the data frame and filter the cases on Dec.31.2020

state_2020_tidy =
  state_2020 %>% 
  unnest(cols = "observations") %>% 
  janitor::clean_names() %>% 
  filter(date == "Dec 31 2020") %>%
  select(state, total_cases, total_case_rate_per_100k)

Make map plot.

plot_2020_df = left_join(state_2020_tidy, postal_code, by = "state")
# specify some map projection/options
g = list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)

plot_2020_df$hover = with(plot_2020_df, paste(
  state, '<br>', "Total case", total_cases, "<br>")
  )

map_plotly2 = 
  plot_geo(plot_2020_df, locationmode = 'USA-states') %>% 
  add_trace(
    z = ~total_case_rate_per_100k, text = ~hover, locations = ~code,
    color = ~total_case_rate_per_100k, colors = 'Purples'
  ) %>% 
  colorbar(title = "Case Rate Per 100K") %>% 
  layout(
    title = "2020 COVID-19 Case Rate by State/Territory (cases per 100,000)",
    geo = g
  )